{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('houses.csv',encoding='gbk')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.用 pandas 读取 csv 格式文件，并将所有字段的重复数据去除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop_duplicates([\"title\", \"price\",\n",
    "                    \"xiaoqu_name\", \"size\",\n",
    "                    \"buildyear\", \"huxing\",\n",
    "                    \"chaoxiang\", \"zhuangxiu\",\n",
    "                    \"cenggao\", \"district_name\",\n",
    "                    \"sub_district_name\"],inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>_id</th>\n",
       "      <th>title</th>\n",
       "      <th>price</th>\n",
       "      <th>up_price</th>\n",
       "      <th>xiaoqu_name</th>\n",
       "      <th>size</th>\n",
       "      <th>buildyear</th>\n",
       "      <th>huxing</th>\n",
       "      <th>chaoxiang</th>\n",
       "      <th>zhuangxiu</th>\n",
       "      <th>cenggao</th>\n",
       "      <th>louxing</th>\n",
       "      <th>district_name</th>\n",
       "      <th>sub_district_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>5dd2198f18bba2b4a9a0b854</td>\n",
       "      <td>大华电梯两房/房型正气/开门南北通/房东诚意出售</td>\n",
       "      <td>690.0</td>\n",
       "      <td>76531</td>\n",
       "      <td>大华锦绣华城(十六街区)(公寓)</td>\n",
       "      <td>90.16</td>\n",
       "      <td>2010</td>\n",
       "      <td>2室2厅</td>\n",
       "      <td>南</td>\n",
       "      <td>简装</td>\n",
       "      <td>中楼层(共18层)</td>\n",
       "      <td>板楼</td>\n",
       "      <td>浦东</td>\n",
       "      <td>北蔡</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>5dd2198f18bba2b4a9a0b855</td>\n",
       "      <td>非底楼 满五年唯一 税费少 婚房装修 楼称佳 户型方正</td>\n",
       "      <td>330.0</td>\n",
       "      <td>52290</td>\n",
       "      <td>芳雅苑</td>\n",
       "      <td>63.11</td>\n",
       "      <td>1995</td>\n",
       "      <td>2室1厅</td>\n",
       "      <td>南</td>\n",
       "      <td>精装</td>\n",
       "      <td>低楼层(共6层)</td>\n",
       "      <td>板楼</td>\n",
       "      <td>浦东</td>\n",
       "      <td>北蔡</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>5dd2198f18bba2b4a9a0b856</td>\n",
       "      <td>满五唯一+7号线锦绣路+复式房+带阁楼+小区央位+精装</td>\n",
       "      <td>500.0</td>\n",
       "      <td>62878</td>\n",
       "      <td>锦博苑</td>\n",
       "      <td>79.52</td>\n",
       "      <td>2007</td>\n",
       "      <td>2室2厅</td>\n",
       "      <td>南</td>\n",
       "      <td>精装</td>\n",
       "      <td>高楼层(共6层)</td>\n",
       "      <td>板楼</td>\n",
       "      <td>浦东</td>\n",
       "      <td>北蔡</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>5dd2198f18bba2b4a9a0b857</td>\n",
       "      <td>13号线陈春路地铁400米中间楼层诚意卖看房方便</td>\n",
       "      <td>330.0</td>\n",
       "      <td>45866</td>\n",
       "      <td>鹏海小区</td>\n",
       "      <td>71.95</td>\n",
       "      <td>1997</td>\n",
       "      <td>2室1厅</td>\n",
       "      <td>南</td>\n",
       "      <td>简装</td>\n",
       "      <td>中楼层(共6层)</td>\n",
       "      <td>板楼</td>\n",
       "      <td>浦东</td>\n",
       "      <td>北蔡</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>5dd2198f18bba2b4a9a0b858</td>\n",
       "      <td>朝阳正气一房，采光好，坐看花园，户型方正，看房方便</td>\n",
       "      <td>460.0</td>\n",
       "      <td>83942</td>\n",
       "      <td>万邦都市花园</td>\n",
       "      <td>54.80</td>\n",
       "      <td>2004</td>\n",
       "      <td>1室1厅</td>\n",
       "      <td>南</td>\n",
       "      <td>简装</td>\n",
       "      <td>中楼层(共11层)</td>\n",
       "      <td>板楼</td>\n",
       "      <td>浦东</td>\n",
       "      <td>北蔡</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0                       _id                        title  price  \\\n",
       "0           0  5dd2198f18bba2b4a9a0b854     大华电梯两房/房型正气/开门南北通/房东诚意出售  690.0   \n",
       "1           1  5dd2198f18bba2b4a9a0b855  非底楼 满五年唯一 税费少 婚房装修 楼称佳 户型方正  330.0   \n",
       "2           2  5dd2198f18bba2b4a9a0b856  满五唯一+7号线锦绣路+复式房+带阁楼+小区央位+精装  500.0   \n",
       "3           3  5dd2198f18bba2b4a9a0b857     13号线陈春路地铁400米中间楼层诚意卖看房方便  330.0   \n",
       "4           4  5dd2198f18bba2b4a9a0b858    朝阳正气一房，采光好，坐看花园，户型方正，看房方便  460.0   \n",
       "\n",
       "   up_price        xiaoqu_name   size  buildyear huxing chaoxiang zhuangxiu  \\\n",
       "0     76531  大华锦绣华城(十六街区)(公寓)   90.16       2010  2室2厅         南        简装    \n",
       "1     52290               芳雅苑   63.11       1995  2室1厅         南        精装    \n",
       "2     62878               锦博苑   79.52       2007  2室2厅         南        精装    \n",
       "3     45866              鹏海小区   71.95       1997  2室1厅         南        简装    \n",
       "4     83942            万邦都市花园   54.80       2004  1室1厅         南        简装    \n",
       "\n",
       "       cenggao louxing district_name sub_district_name  \n",
       "0   中楼层(共18层)       板楼            浦东                北蔡  \n",
       "1    低楼层(共6层)       板楼            浦东                北蔡  \n",
       "2    高楼层(共6层)       板楼            浦东                北蔡  \n",
       "3    中楼层(共6层)       板楼            浦东                北蔡  \n",
       "4   中楼层(共11层)       板楼            浦东                北蔡  "
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 69654 entries, 0 to 69653\n",
      "Data columns (total 15 columns):\n",
      " #   Column             Non-Null Count  Dtype  \n",
      "---  ------             --------------  -----  \n",
      " 0   Unnamed: 0         69654 non-null  int64  \n",
      " 1   _id                69654 non-null  object \n",
      " 2   title              69654 non-null  object \n",
      " 3   price              69654 non-null  float64\n",
      " 4   up_price           69654 non-null  int64  \n",
      " 5   xiaoqu_name        69654 non-null  object \n",
      " 6   size               69654 non-null  float64\n",
      " 7   buildyear          69654 non-null  int64  \n",
      " 8   huxing             69654 non-null  object \n",
      " 9   chaoxiang          69654 non-null  object \n",
      " 10  zhuangxiu          69654 non-null  object \n",
      " 11  cenggao            69654 non-null  object \n",
      " 12  louxing            69654 non-null  object \n",
      " 13  district_name      69654 non-null  object \n",
      " 14  sub_district_name  69654 non-null  object \n",
      "dtypes: float64(2), int64(3), object(10)\n",
      "memory usage: 8.5+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>price</th>\n",
       "      <th>up_price</th>\n",
       "      <th>size</th>\n",
       "      <th>buildyear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>69654.000000</td>\n",
       "      <td>69654.000000</td>\n",
       "      <td>69654.000000</td>\n",
       "      <td>69654.000000</td>\n",
       "      <td>69654.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>37500.170428</td>\n",
       "      <td>561.697387</td>\n",
       "      <td>55051.480906</td>\n",
       "      <td>97.111178</td>\n",
       "      <td>1999.823025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>22800.348813</td>\n",
       "      <td>736.013020</td>\n",
       "      <td>23617.340821</td>\n",
       "      <td>75.445381</td>\n",
       "      <td>10.330195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>9123.000000</td>\n",
       "      <td>8.240000</td>\n",
       "      <td>1900.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>18337.250000</td>\n",
       "      <td>260.000000</td>\n",
       "      <td>39189.000000</td>\n",
       "      <td>55.910000</td>\n",
       "      <td>1994.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>36401.000000</td>\n",
       "      <td>375.000000</td>\n",
       "      <td>51408.500000</td>\n",
       "      <td>80.200000</td>\n",
       "      <td>2001.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>55397.750000</td>\n",
       "      <td>625.000000</td>\n",
       "      <td>66435.000000</td>\n",
       "      <td>112.670000</td>\n",
       "      <td>2007.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>96157.000000</td>\n",
       "      <td>35000.000000</td>\n",
       "      <td>527406.000000</td>\n",
       "      <td>1915.560000</td>\n",
       "      <td>2019.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Unnamed: 0         price       up_price          size     buildyear\n",
       "count  69654.000000  69654.000000   69654.000000  69654.000000  69654.000000\n",
       "mean   37500.170428    561.697387   55051.480906     97.111178   1999.823025\n",
       "std    22800.348813    736.013020   23617.340821     75.445381     10.330195\n",
       "min        0.000000     30.000000    9123.000000      8.240000   1900.000000\n",
       "25%    18337.250000    260.000000   39189.000000     55.910000   1994.000000\n",
       "50%    36401.000000    375.000000   51408.500000     80.200000   2001.000000\n",
       "75%    55397.750000    625.000000   66435.000000    112.670000   2007.000000\n",
       "max    96157.000000  35000.000000  527406.000000   1915.560000   2019.000000"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.请统计出总房源数量、总小区数量、总版块数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "house_num = len(data['_id'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "69654"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house_num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [],
   "source": [
    "xiaoqu_num = len(data['xiaoqu_name'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8333"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xiaoqu_num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "sub_district_name = len(data['sub_district_name'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "165"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub_district_name"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.请统计出有哪些不同的户型、有哪些不同的装修情况、有哪些不同的建造年份"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
    "huxing_num = data['huxing'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['2室2厅 ', '2室1厅 ', '1室1厅 ', '4室3厅 ', '3室1厅 ', '3室2厅 ', '4室2厅 ',\n",
       "       '1室2厅 ', '2室0厅 ', '5室1厅 ', '1室0厅 ', '5室3厅 ', '5室2厅 ', '4室1厅 ',\n",
       "       '4室4厅 ', '6室3厅 ', '5室6厅 ', '6室2厅 ', '5室4厅 ', '7室3厅 ', '3室3厅 ',\n",
       "       '9室2厅 ', '7室2厅 ', '7室1厅 ', '6室4厅 ', '6室1厅 ', '8室4厅 ', '3室0厅 ',\n",
       "       '7室4厅 ', '7室5厅 ', '4室5厅 ', '9室6厅 ', '9室4厅 ', '6室5厅 ', '10室1厅 ',\n",
       "       '4室0厅 ', '8室1厅 ', '9室3厅 ', '9室9厅 ', '5室5厅 ', '8室3厅 ', '2室3厅 ',\n",
       "       '7室6厅 ', '8室2厅 ', '0室0厅 ', '6室6厅 ', '10室6厅 ', '2室4厅 ', '6室0厅 ',\n",
       "       '10室2厅 ', '7室0厅 ', '3室4厅 ', '8室5厅 ', '10室3厅 ', '5室0厅 ', '3室5厅 '],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "huxing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "zhuangxiu = data['zhuangxiu'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([' 简装 ', ' 精装 ', ' 毛坯 ', ' 其他 '], dtype=object)"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "zhuangxiu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "buildyear = data['buildyear'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2010, 1995, 2007, 1997, 2004, 1994, 2011, 2012, 1998, 2013, 1999,\n",
       "       2016, 2000, 2005, 1996, 2003, 2015, 2006, 2009, 1993, 2002, 2014,\n",
       "       1992, 2008, 2001, 2019, 2018, 1987, 1991, 2017, 1989, 1988, 1982,\n",
       "       1983, 1984, 1981, 1985, 1977, 1986, 1990, 1979, 1980, 1953, 1976,\n",
       "       1958, 1978, 1969, 1974, 1975, 1955, 1971, 1954, 1956, 1959, 1968,\n",
       "       1964, 1960, 1967, 1972, 1961, 1973, 1936, 1911, 1941, 1948, 1949,\n",
       "       1939, 1933, 1940, 1946, 1937, 1910, 1945, 1957, 1965, 1966, 1970,\n",
       "       1963, 1950, 1962, 1930, 1920, 1947, 1938, 1934, 1935, 1921, 1926,\n",
       "       1912, 1924, 1928, 1931, 1943, 1900, 1952, 1951], dtype=int64)"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "buildyear"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.请计算出上海二手房每平方米均价"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [],
   "source": [
    "price_mean = np.mean(data['up_price'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "55051.4809056192"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "price_mean"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.请统计出不同户型对应的房源数量、平均面积；并从大到小进行排序，判断出主流户型是什么？平均面积多大？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>huxing</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2室1厅</th>\n",
       "      <td>17364</td>\n",
       "      <td>66.915066</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2室2厅</th>\n",
       "      <td>12990</td>\n",
       "      <td>93.016552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3室2厅</th>\n",
       "      <td>12618</td>\n",
       "      <td>129.362787</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1室1厅</th>\n",
       "      <td>12103</td>\n",
       "      <td>46.955651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4室2厅</th>\n",
       "      <td>3613</td>\n",
       "      <td>208.867312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3室1厅</th>\n",
       "      <td>3451</td>\n",
       "      <td>91.561336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1室0厅</th>\n",
       "      <td>1914</td>\n",
       "      <td>36.206541</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2室0厅</th>\n",
       "      <td>1369</td>\n",
       "      <td>49.615223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1室2厅</th>\n",
       "      <td>1192</td>\n",
       "      <td>67.410990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5室2厅</th>\n",
       "      <td>855</td>\n",
       "      <td>286.614175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5室3厅</th>\n",
       "      <td>584</td>\n",
       "      <td>360.780565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4室3厅</th>\n",
       "      <td>566</td>\n",
       "      <td>272.177951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6室3厅</th>\n",
       "      <td>181</td>\n",
       "      <td>395.391215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6室2厅</th>\n",
       "      <td>147</td>\n",
       "      <td>289.408435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4室1厅</th>\n",
       "      <td>145</td>\n",
       "      <td>154.547586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3室3厅</th>\n",
       "      <td>134</td>\n",
       "      <td>175.504030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3室0厅</th>\n",
       "      <td>107</td>\n",
       "      <td>68.571869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5室4厅</th>\n",
       "      <td>51</td>\n",
       "      <td>403.056667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6室4厅</th>\n",
       "      <td>49</td>\n",
       "      <td>516.186122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7室3厅</th>\n",
       "      <td>36</td>\n",
       "      <td>461.077222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5室1厅</th>\n",
       "      <td>32</td>\n",
       "      <td>225.869375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7室2厅</th>\n",
       "      <td>30</td>\n",
       "      <td>310.042667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4室4厅</th>\n",
       "      <td>21</td>\n",
       "      <td>265.587143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6室1厅</th>\n",
       "      <td>11</td>\n",
       "      <td>170.230909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7室4厅</th>\n",
       "      <td>9</td>\n",
       "      <td>535.334444</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8室2厅</th>\n",
       "      <td>9</td>\n",
       "      <td>366.875556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2室3厅</th>\n",
       "      <td>8</td>\n",
       "      <td>118.597500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6室5厅</th>\n",
       "      <td>7</td>\n",
       "      <td>669.482857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8室3厅</th>\n",
       "      <td>6</td>\n",
       "      <td>474.710000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7室5厅</th>\n",
       "      <td>5</td>\n",
       "      <td>532.396000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8室4厅</th>\n",
       "      <td>5</td>\n",
       "      <td>622.384000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9室2厅</th>\n",
       "      <td>4</td>\n",
       "      <td>757.650000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0室0厅</th>\n",
       "      <td>4</td>\n",
       "      <td>237.622500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9室4厅</th>\n",
       "      <td>3</td>\n",
       "      <td>409.463333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4室0厅</th>\n",
       "      <td>3</td>\n",
       "      <td>192.590000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5室6厅</th>\n",
       "      <td>3</td>\n",
       "      <td>344.720000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6室0厅</th>\n",
       "      <td>2</td>\n",
       "      <td>504.425000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5室5厅</th>\n",
       "      <td>2</td>\n",
       "      <td>334.585000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9室6厅</th>\n",
       "      <td>2</td>\n",
       "      <td>473.775000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7室1厅</th>\n",
       "      <td>2</td>\n",
       "      <td>177.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7室6厅</th>\n",
       "      <td>2</td>\n",
       "      <td>406.005000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2室4厅</th>\n",
       "      <td>1</td>\n",
       "      <td>129.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9室3厅</th>\n",
       "      <td>1</td>\n",
       "      <td>593.010000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10室2厅</th>\n",
       "      <td>1</td>\n",
       "      <td>370.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8室5厅</th>\n",
       "      <td>1</td>\n",
       "      <td>1663.100000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10室3厅</th>\n",
       "      <td>1</td>\n",
       "      <td>598.320000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10室6厅</th>\n",
       "      <td>1</td>\n",
       "      <td>473.610000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3室4厅</th>\n",
       "      <td>1</td>\n",
       "      <td>94.370000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8室1厅</th>\n",
       "      <td>1</td>\n",
       "      <td>386.170000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3室5厅</th>\n",
       "      <td>1</td>\n",
       "      <td>376.210000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4室5厅</th>\n",
       "      <td>1</td>\n",
       "      <td>321.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7室0厅</th>\n",
       "      <td>1</td>\n",
       "      <td>161.440000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6室6厅</th>\n",
       "      <td>1</td>\n",
       "      <td>1440.160000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5室0厅</th>\n",
       "      <td>1</td>\n",
       "      <td>89.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10室1厅</th>\n",
       "      <td>1</td>\n",
       "      <td>544.990000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9室9厅</th>\n",
       "      <td>1</td>\n",
       "      <td>1352.240000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        count         mean\n",
       "huxing                    \n",
       "2室1厅    17364    66.915066\n",
       "2室2厅    12990    93.016552\n",
       "3室2厅    12618   129.362787\n",
       "1室1厅    12103    46.955651\n",
       "4室2厅     3613   208.867312\n",
       "3室1厅     3451    91.561336\n",
       "1室0厅     1914    36.206541\n",
       "2室0厅     1369    49.615223\n",
       "1室2厅     1192    67.410990\n",
       "5室2厅      855   286.614175\n",
       "5室3厅      584   360.780565\n",
       "4室3厅      566   272.177951\n",
       "6室3厅      181   395.391215\n",
       "6室2厅      147   289.408435\n",
       "4室1厅      145   154.547586\n",
       "3室3厅      134   175.504030\n",
       "3室0厅      107    68.571869\n",
       "5室4厅       51   403.056667\n",
       "6室4厅       49   516.186122\n",
       "7室3厅       36   461.077222\n",
       "5室1厅       32   225.869375\n",
       "7室2厅       30   310.042667\n",
       "4室4厅       21   265.587143\n",
       "6室1厅       11   170.230909\n",
       "7室4厅        9   535.334444\n",
       "8室2厅        9   366.875556\n",
       "2室3厅        8   118.597500\n",
       "6室5厅        7   669.482857\n",
       "8室3厅        6   474.710000\n",
       "7室5厅        5   532.396000\n",
       "8室4厅        5   622.384000\n",
       "9室2厅        4   757.650000\n",
       "0室0厅        4   237.622500\n",
       "9室4厅        3   409.463333\n",
       "4室0厅        3   192.590000\n",
       "5室6厅        3   344.720000\n",
       "6室0厅        2   504.425000\n",
       "5室5厅        2   334.585000\n",
       "9室6厅        2   473.775000\n",
       "7室1厅        2   177.200000\n",
       "7室6厅        2   406.005000\n",
       "2室4厅        1   129.400000\n",
       "9室3厅        1   593.010000\n",
       "10室2厅       1   370.000000\n",
       "8室5厅        1  1663.100000\n",
       "10室3厅       1   598.320000\n",
       "10室6厅       1   473.610000\n",
       "3室4厅        1    94.370000\n",
       "8室1厅        1   386.170000\n",
       "3室5厅        1   376.210000\n",
       "4室5厅        1   321.000000\n",
       "7室0厅        1   161.440000\n",
       "6室6厅        1  1440.160000\n",
       "5室0厅        1    89.600000\n",
       "10室1厅       1   544.990000\n",
       "9室9厅        1  1352.240000"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['size'].groupby([data['huxing']]).agg(['count','mean']).sort_values(by='count',ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.获取每个小区的房源数量，并按从大到小的顺序排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "上海康城           216\n",
       "同润菲诗艾伦         117\n",
       "中远两湾城          114\n",
       "金地艺境(宝山)       106\n",
       "美岸栖庭(公寓)       106\n",
       "              ... \n",
       "康平路207弄          1\n",
       "共和新路2193号        1\n",
       "康定路841号乙         1\n",
       "东方曼哈顿(尚东区)       1\n",
       "岳阳花苑             1\n",
       "Name: xiaoqu_name, Length: 8333, dtype: int64"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['xiaoqu_name'].value_counts().sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.请统计出上海每个小区每平方米的均价"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "xiaoqu_name\n",
       "常熟路182号      527406.000000\n",
       "良友别墅         393443.000000\n",
       "霞飞别墅         369767.000000\n",
       "复兴西路299弄     356385.666667\n",
       "兴国路322号      318560.500000\n",
       "                 ...      \n",
       "御景龙庭玲珑公馆      13226.000000\n",
       "临潮二村          13093.000000\n",
       "华鼎大楼          13055.000000\n",
       "万寿新村(金山)      12680.000000\n",
       "石化一村          12144.000000\n",
       "Name: up_price, Length: 8333, dtype: float64"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['up_price'].groupby([data['xiaoqu_name']]).mean().sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8.请统计出上海二手房总价排在前 5 位的房源，看看超级豪宅有多壕？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>_id</th>\n",
       "      <th>title</th>\n",
       "      <th>price</th>\n",
       "      <th>up_price</th>\n",
       "      <th>xiaoqu_name</th>\n",
       "      <th>size</th>\n",
       "      <th>buildyear</th>\n",
       "      <th>huxing</th>\n",
       "      <th>chaoxiang</th>\n",
       "      <th>zhuangxiu</th>\n",
       "      <th>cenggao</th>\n",
       "      <th>louxing</th>\n",
       "      <th>district_name</th>\n",
       "      <th>sub_district_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>46396</th>\n",
       "      <td>48887</td>\n",
       "      <td>5dd22309303c5558c090e6d9</td>\n",
       "      <td>花园18亩 开门见3座山 背面靠山 南东2面临天然大河</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>191890</td>\n",
       "      <td>世茂佘山庄园</td>\n",
       "      <td>1823.97</td>\n",
       "      <td>2008</td>\n",
       "      <td>6室3厅</td>\n",
       "      <td>南</td>\n",
       "      <td>毛坯</td>\n",
       "      <td>2层</td>\n",
       "      <td>板楼</td>\n",
       "      <td>松江</td>\n",
       "      <td>佘山</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46451</th>\n",
       "      <td>48943</td>\n",
       "      <td>5dd22309303c5558c090e711</td>\n",
       "      <td>仟万精装、华庭岛屿大独栋！5亩占地、自住！价格可谈</td>\n",
       "      <td>30000.0</td>\n",
       "      <td>189672</td>\n",
       "      <td>佘山高尔夫郡</td>\n",
       "      <td>1581.68</td>\n",
       "      <td>2009</td>\n",
       "      <td>6室3厅</td>\n",
       "      <td>南 北</td>\n",
       "      <td>精装</td>\n",
       "      <td>2层</td>\n",
       "      <td>板楼</td>\n",
       "      <td>松江</td>\n",
       "      <td>佘山</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18891</th>\n",
       "      <td>19869</td>\n",
       "      <td>5dd21bf518bba2b4a9a1061f</td>\n",
       "      <td>罕见！占地10亩庄园，整个园区的King！让上海都在仰望</td>\n",
       "      <td>30000.0</td>\n",
       "      <td>156613</td>\n",
       "      <td>绿城玫瑰园</td>\n",
       "      <td>1915.56</td>\n",
       "      <td>2013</td>\n",
       "      <td>6室4厅</td>\n",
       "      <td>南</td>\n",
       "      <td>毛坯</td>\n",
       "      <td>2层</td>\n",
       "      <td>板楼</td>\n",
       "      <td>闵行</td>\n",
       "      <td>马桥</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28332</th>\n",
       "      <td>29721</td>\n",
       "      <td>5dd21d0718bba2b4a9a12cb3</td>\n",
       "      <td>安福路/常熟路 沿街独幢花园洋房，英式风格 地价已补</td>\n",
       "      <td>24999.0</td>\n",
       "      <td>527406</td>\n",
       "      <td>常熟路182号</td>\n",
       "      <td>474.00</td>\n",
       "      <td>1937</td>\n",
       "      <td>6室3厅</td>\n",
       "      <td>南</td>\n",
       "      <td>精装</td>\n",
       "      <td>低楼层(共3层)</td>\n",
       "      <td>板塔结合</td>\n",
       "      <td>徐汇</td>\n",
       "      <td>衡山路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18875</th>\n",
       "      <td>19853</td>\n",
       "      <td>5dd21bf418bba2b4a9a1060f</td>\n",
       "      <td>（岛宅主推）占地8亩上产证 两面邻水靠大河 景观位置</td>\n",
       "      <td>21000.0</td>\n",
       "      <td>128214</td>\n",
       "      <td>绿城玫瑰园</td>\n",
       "      <td>1637.89</td>\n",
       "      <td>2014</td>\n",
       "      <td>6室4厅</td>\n",
       "      <td>南</td>\n",
       "      <td>毛坯</td>\n",
       "      <td>2层</td>\n",
       "      <td>板楼</td>\n",
       "      <td>闵行</td>\n",
       "      <td>马桥</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0                       _id                         title  \\\n",
       "46396       48887  5dd22309303c5558c090e6d9   花园18亩 开门见3座山 背面靠山 南东2面临天然大河   \n",
       "46451       48943  5dd22309303c5558c090e711     仟万精装、华庭岛屿大独栋！5亩占地、自住！价格可谈   \n",
       "18891       19869  5dd21bf518bba2b4a9a1061f  罕见！占地10亩庄园，整个园区的King！让上海都在仰望   \n",
       "28332       29721  5dd21d0718bba2b4a9a12cb3    安福路/常熟路 沿街独幢花园洋房，英式风格 地价已补   \n",
       "18875       19853  5dd21bf418bba2b4a9a1060f    （岛宅主推）占地8亩上产证 两面邻水靠大河 景观位置   \n",
       "\n",
       "         price  up_price xiaoqu_name     size  buildyear huxing chaoxiang  \\\n",
       "46396  35000.0    191890     世茂佘山庄园   1823.97       2008  6室3厅         南    \n",
       "46451  30000.0    189672     佘山高尔夫郡   1581.68       2009  6室3厅       南 北    \n",
       "18891  30000.0    156613      绿城玫瑰园   1915.56       2013  6室4厅         南    \n",
       "28332  24999.0    527406    常熟路182号    474.00       1937  6室3厅         南    \n",
       "18875  21000.0    128214      绿城玫瑰园   1637.89       2014  6室4厅         南    \n",
       "\n",
       "      zhuangxiu     cenggao louxing district_name sub_district_name  \n",
       "46396       毛坯          2层      板楼             松江                佘山  \n",
       "46451       精装          2层      板楼             松江                佘山  \n",
       "18891       毛坯          2层      板楼             闵行                马桥  \n",
       "28332       精装    低楼层(共3层)     板塔结合            徐汇               衡山路  \n",
       "18875       毛坯          2层      板楼             闵行                马桥  "
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.sort_values(by='price',ascending=False).head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9.请计算出不同年代的房子数量（10年一个区间）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['buildyear'] = data['buildyear'].astype(float).fillna(0.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['buildyear'] = pd.to_datetime(data['buildyear'],format='%Y')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "buildyear\n",
       "1900-12-31        1\n",
       "1910-12-31        1\n",
       "1920-12-31        7\n",
       "1930-12-31       36\n",
       "1940-12-31      112\n",
       "1950-12-31       40\n",
       "1960-12-31      425\n",
       "1970-12-31      234\n",
       "1980-12-31     1282\n",
       "1990-12-31     8536\n",
       "2000-12-31    23747\n",
       "2010-12-31    25136\n",
       "2020-12-31    10097\n",
       "Freq: 10A-DEC, Name: buildyear, dtype: int64"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['buildyear'].groupby(data['buildyear']).count().resample('10A').sum()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
